import requests
from bs4 import BeautifulSoup
from numpy import *
import parsel
import random
import time
import csv
import re
from lxml import etree




#创建文件对象
f = open('F:/肥西2017-2022年房价.csv', 'w',
         encoding='utf-8-sig', newline="")  # 创建文件对象
csv_write = csv.DictWriter(f, fieldnames=['年份', '月份', '每月价格','均价'])
csv_write.writeheader()  # 写入文件头

#设置请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}

x = 2017
z= 0
#源地址
for i in range(2017, 2023):
    print('爬取{}年'.format(i))
    t_url = 'https://www.anjuke.com/fangjia/hf{}/feixixiancheng/'.format(i)
    #print(t_url)
    response = requests.get(url=t_url, headers=headers)
   # print(response)
    html_data = response.text
    tree = etree.HTML(html_data)

#年分
    year = tree.xpath("//div[@class='fjlist-box boxstyle2']/h3/text()")
   # print(year[0])

   # print('--------------------------')

#月份
    month = tree.xpath(
        "//div[@class='fjlist-box boxstyle2']//a[@class='nostyle']/b/text()")
    number = [x[5:7] for x in month]
    length = len(number)
    res = 13-length

#价格
    
    price = []
    y = 0
    currentprice = tree.xpath(
        "//div[@class='fjlist-box boxstyle2']//a[@class='nostyle']/span/text()")
    #单独提出价格数字
    pricenumber = [x[0:5] for x in currentprice]
    #print(len(currentprice))
    leng = len(currentprice)
    #计算平均价格
    a = 0
    for i in range(0,leng):
        a += int(pricenumber[i])
    #将平均价格放入每年的第一个月的行里，其他数值都是None
    a = round(a/leng, 2)
    for i in range(0,leng):
        if i == 0:
            price.append(a)
        else :
            price.append(None)
    # 将数据读入csv文件
    #每一年的
    for j in range(0, length):
    
       data_dict = {'年份': x,  '月份': number[j], '每月价格': currentprice[j],'均价': price[j]}
       csv_write.writerow(data_dict)
    print('第{}年的数据加载完成'.format(x))
    x += 1


print('--------------------------')
print('全部爬取成功！')
